import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings("ignore")
import plotly.express as px
%matplotlib inline
sns.set_style('whitegrid')
plt.style.use('fivethirtyeight')
D:\anaconda files\lib\site-packages\scipy\__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.4
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
data=pd.read_csv(r"C:\\Users\\laxma\\Downloads\\KAG_conversion_data.csv")
data
| ad_id | xyz_campaign_id | fb_campaign_id | age | gender | interest | Impressions | Clicks | Spent | Total_Conversion | Approved_Conversion | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 708746 | 916 | 103916 | 30-34 | M | 15 | 7350 | 1 | 1.430000 | 2 | 1 |
| 1 | 708749 | 916 | 103917 | 30-34 | M | 16 | 17861 | 2 | 1.820000 | 2 | 0 |
| 2 | 708771 | 916 | 103920 | 30-34 | M | 20 | 693 | 0 | 0.000000 | 1 | 0 |
| 3 | 708815 | 916 | 103928 | 30-34 | M | 28 | 4259 | 1 | 1.250000 | 1 | 0 |
| 4 | 708818 | 916 | 103928 | 30-34 | M | 28 | 4133 | 1 | 1.290000 | 1 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1138 | 1314410 | 1178 | 179977 | 45-49 | F | 109 | 1129773 | 252 | 358.189997 | 13 | 2 |
| 1139 | 1314411 | 1178 | 179978 | 45-49 | F | 110 | 637549 | 120 | 173.880003 | 3 | 0 |
| 1140 | 1314412 | 1178 | 179979 | 45-49 | F | 111 | 151531 | 28 | 40.289999 | 2 | 0 |
| 1141 | 1314414 | 1178 | 179981 | 45-49 | F | 113 | 790253 | 135 | 198.710001 | 8 | 2 |
| 1142 | 1314415 | 1178 | 179982 | 45-49 | F | 114 | 513161 | 114 | 165.609999 | 5 | 2 |
1143 rows × 11 columns
data.head()
| ad_id | xyz_campaign_id | fb_campaign_id | age | gender | interest | Impressions | Clicks | Spent | Total_Conversion | Approved_Conversion | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 708746 | 916 | 103916 | 30-34 | M | 15 | 7350 | 1 | 1.43 | 2 | 1 |
| 1 | 708749 | 916 | 103917 | 30-34 | M | 16 | 17861 | 2 | 1.82 | 2 | 0 |
| 2 | 708771 | 916 | 103920 | 30-34 | M | 20 | 693 | 0 | 0.00 | 1 | 0 |
| 3 | 708815 | 916 | 103928 | 30-34 | M | 28 | 4259 | 1 | 1.25 | 1 | 0 |
| 4 | 708818 | 916 | 103928 | 30-34 | M | 28 | 4133 | 1 | 1.29 | 1 | 1 |
data.tail()
| ad_id | xyz_campaign_id | fb_campaign_id | age | gender | interest | Impressions | Clicks | Spent | Total_Conversion | Approved_Conversion | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1138 | 1314410 | 1178 | 179977 | 45-49 | F | 109 | 1129773 | 252 | 358.189997 | 13 | 2 |
| 1139 | 1314411 | 1178 | 179978 | 45-49 | F | 110 | 637549 | 120 | 173.880003 | 3 | 0 |
| 1140 | 1314412 | 1178 | 179979 | 45-49 | F | 111 | 151531 | 28 | 40.289999 | 2 | 0 |
| 1141 | 1314414 | 1178 | 179981 | 45-49 | F | 113 | 790253 | 135 | 198.710001 | 8 | 2 |
| 1142 | 1314415 | 1178 | 179982 | 45-49 | F | 114 | 513161 | 114 | 165.609999 | 5 | 2 |
data.describe()
| ad_id | xyz_campaign_id | fb_campaign_id | interest | Impressions | Clicks | Spent | Total_Conversion | Approved_Conversion | |
|---|---|---|---|---|---|---|---|---|---|
| count | 1.143000e+03 | 1143.000000 | 1143.000000 | 1143.000000 | 1.143000e+03 | 1143.000000 | 1143.000000 | 1143.000000 | 1143.000000 |
| mean | 9.872611e+05 | 1067.382327 | 133783.989501 | 32.766404 | 1.867321e+05 | 33.390201 | 51.360656 | 2.855643 | 0.944007 |
| std | 1.939928e+05 | 121.629393 | 20500.308622 | 26.952131 | 3.127622e+05 | 56.892438 | 86.908418 | 4.483593 | 1.737708 |
| min | 7.087460e+05 | 916.000000 | 103916.000000 | 2.000000 | 8.700000e+01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 7.776325e+05 | 936.000000 | 115716.000000 | 16.000000 | 6.503500e+03 | 1.000000 | 1.480000 | 1.000000 | 0.000000 |
| 50% | 1.121185e+06 | 1178.000000 | 144549.000000 | 25.000000 | 5.150900e+04 | 8.000000 | 12.370000 | 1.000000 | 1.000000 |
| 75% | 1.121804e+06 | 1178.000000 | 144657.500000 | 31.000000 | 2.217690e+05 | 37.500000 | 60.025000 | 3.000000 | 1.000000 |
| max | 1.314415e+06 | 1178.000000 | 179982.000000 | 114.000000 | 3.052003e+06 | 421.000000 | 639.949998 | 60.000000 | 21.000000 |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1143 entries, 0 to 1142 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ad_id 1143 non-null int64 1 xyz_campaign_id 1143 non-null int64 2 fb_campaign_id 1143 non-null int64 3 age 1143 non-null object 4 gender 1143 non-null object 5 interest 1143 non-null int64 6 Impressions 1143 non-null int64 7 Clicks 1143 non-null int64 8 Spent 1143 non-null float64 9 Total_Conversion 1143 non-null int64 10 Approved_Conversion 1143 non-null int64 dtypes: float64(1), int64(8), object(2) memory usage: 98.4+ KB
data.isnull().sum()
ad_id 0 xyz_campaign_id 0 fb_campaign_id 0 age 0 gender 0 interest 0 Impressions 0 Clicks 0 Spent 0 Total_Conversion 0 Approved_Conversion 0 dtype: int64
data=data.dropna()
data
| ad_id | xyz_campaign_id | fb_campaign_id | age | gender | interest | Impressions | Clicks | Spent | Total_Conversion | Approved_Conversion | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 708746 | 916 | 103916 | 30-34 | M | 15 | 7350 | 1 | 1.430000 | 2 | 1 |
| 1 | 708749 | 916 | 103917 | 30-34 | M | 16 | 17861 | 2 | 1.820000 | 2 | 0 |
| 2 | 708771 | 916 | 103920 | 30-34 | M | 20 | 693 | 0 | 0.000000 | 1 | 0 |
| 3 | 708815 | 916 | 103928 | 30-34 | M | 28 | 4259 | 1 | 1.250000 | 1 | 0 |
| 4 | 708818 | 916 | 103928 | 30-34 | M | 28 | 4133 | 1 | 1.290000 | 1 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1138 | 1314410 | 1178 | 179977 | 45-49 | F | 109 | 1129773 | 252 | 358.189997 | 13 | 2 |
| 1139 | 1314411 | 1178 | 179978 | 45-49 | F | 110 | 637549 | 120 | 173.880003 | 3 | 0 |
| 1140 | 1314412 | 1178 | 179979 | 45-49 | F | 111 | 151531 | 28 | 40.289999 | 2 | 0 |
| 1141 | 1314414 | 1178 | 179981 | 45-49 | F | 113 | 790253 | 135 | 198.710001 | 8 | 2 |
| 1142 | 1314415 | 1178 | 179982 | 45-49 | F | 114 | 513161 | 114 | 165.609999 | 5 | 2 |
1143 rows × 11 columns
data.isnull().sum()
ad_id 0 xyz_campaign_id 0 fb_campaign_id 0 age 0 gender 0 interest 0 Impressions 0 Clicks 0 Spent 0 Total_Conversion 0 Approved_Conversion 0 dtype: int64
data.duplicated().sum()
0
#VISUALIZATION
plt.bar(data['gender'],data['Total_Conversion'])
plt.xticks(rotation=90)
plt.show()
fig=px.bar(data,x='interest',y='Impressions',color='interest')
fig.show()
fig=px.violin(data,x='Approved_Conversion',y='fb_campaign_id',color='Approved_Conversion')
fig.show()
fig=px.bar(data,x='Total_Conversion',y='Clicks',color='Clicks')
fig.show()
plt.scatter(data['Spent'],data['Total_Conversion'],color='red')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(10,4))
sns.countplot(x='interest', data=data, color='cyan')
plt.xticks(rotation=90)
plt.show()
sns.barplot(data['Approved_Conversion'],data['fb_campaign_id'],color='r')
plt.xticks(rotation=90)
plt.show()
sns.lineplot(x='Approved_Conversion', y='ad_id', data=data).set_title('Approved_Conversion by ad_id')
Text(0.5, 1.0, 'Approved_Conversion by ad_id')
plt.figure(figsize=(8, 6))
data.age.hist(bins=data.age.nunique())
plt.xlabel('age')
Text(0.5, 0, 'age')
sns.relplot(x='Clicks',y='ad_id',data=data)
<seaborn.axisgrid.FacetGrid at 0x1e0e68876a0>
sns.displot(data["age"])
<seaborn.axisgrid.FacetGrid at 0x1e0e68b3760>
plt.figure(figsize=(8, 6))
data.age.hist(bins=data.age.nunique())
plt.xlabel('age')
Text(0.5, 0, 'age')
plt.figure(figsize=(8,6))
sns.jointplot(x=data["Spent"],y=data['age'])
<seaborn.axisgrid.JointGrid at 0x1e0e64a6a30>
<Figure size 800x600 with 0 Axes>
plt.figure(figsize=(8,6))
sns.jointplot(x=data["interest"],y=data.Total_Conversion, kind='kde')
<seaborn.axisgrid.JointGrid at 0x1e0e64c4040>
<Figure size 800x600 with 0 Axes>
plt.figure(figsize=(8,6))
sns.jointplot(x=data["fb_campaign_id"],y=data["Approved_Conversion"])
<seaborn.axisgrid.JointGrid at 0x1e0e6570cd0>
<Figure size 800x600 with 0 Axes>
sns.pairplot(data)
<seaborn.axisgrid.PairGrid at 0x1e0e3b44f10>
#MODEL BUILDING
data['Clicks'].value_counts()
0 207
1 119
2 70
3 51
4 34
...
421 1
119 1
111 1
45 1
252 1
Name: Clicks, Length: 183, dtype: int64
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
def print_score(clf, X_train, y_train, X_test, y_test, train=True):
if train:
pred=clf.predict(X_train)
clf_report=pd.DataFrame(classification_report(y_train, pred, output_dict=True))
print("train result:\n")
print(f"accuracy score:{accuracy_score(y_train,pred)*100:.2f}%")
print(f"CLASSIFICATION REPORT:\n,{clf_report}")
print(f"\nconfusion matrix:\n{confusion_matrix(y_train,pred)}\n")
elif train==False:
pred=clf.predict(X_test)
clf_report=pd.DataFrame(classification_report(y_test, pred, output_dict=True))
print("train result:\n")
print(f"accuracy score:{accuracy_score(y_test,pred)*100:.2f}%")
print(f"CLASSIFICATION REPORT:\n,{clf_report}")
print(f"\nconfusion matrix:\n{confusion_matrix(y_test,pred)}\n")
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OrdinalEncoder
from sklearn.compose import make_column_transformer
from sklearn.model_selection import train_test_split
X = data.drop(['ad_id', 'xyz_campaign_id', 'fb_campaign_id', 'age', 'gender'], axis=1)
y = data['Clicks']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
num_columns = ['interest', 'Clicks', 'Approved_Conversion', 'Total_Conversion', 'Impressions']
ct = make_column_transformer(
(MinMaxScaler(), num_columns),
(StandardScaler(), num_columns),
remainder = 'passthrough'
)
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
lr_clf=LogisticRegression(solver='liblinear')
lr_clf.fit(X_train, y_train)
print_score(lr_clf, X_train, y_train, X_test, y_test, train=True)
print_score(lr_clf, X_train, y_train, X_test, y_test, train=False)
train result:
accuracy score:30.88%
CLASSIFICATION REPORT:
, 0 1 2 3 4 5 6 7 \
precision 0.965753 0.420455 0.0 0.0 0.0 0.097561 0.0 0.0
recall 1.000000 0.936709 0.0 0.0 0.0 0.148148 0.0 0.0
f1-score 0.982578 0.580392 0.0 0.0 0.0 0.117647 0.0 0.0
support 141.000000 79.000000 49.0 38.0 25.0 27.000000 12.0 21.0
8 9 ... 272 276 282 295 346 353 421 \
precision 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.333333 0.333333
recall 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.000000 1.000000
f1-score 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.500000 0.500000
support 10.0 7.0 ... 1.0 1.0 1.0 1.0 1.0 1.000000 1.000000
accuracy macro avg weighted avg
precision 0.30875 0.056544 0.238131
recall 0.30875 0.088461 0.308750
f1-score 0.30875 0.056183 0.254989
support 0.30875 800.000000 800.000000
[4 rows x 161 columns]
confusion matrix:
[[141 0 0 ... 0 0 0]
[ 5 74 0 ... 0 0 0]
[ 0 48 0 ... 0 0 0]
...
[ 0 0 0 ... 0 0 0]
[ 0 0 0 ... 0 1 0]
[ 0 0 0 ... 0 0 1]]
train result:
accuracy score:30.03%
CLASSIFICATION REPORT:
, 0 1 2 3 4 5 6 7 8 \
precision 0.929577 0.492958 0.0 0.0 0.0 0.055556 0.0 0.0 0.0
recall 1.000000 0.875000 0.0 0.0 0.0 0.142857 0.0 0.0 0.0
f1-score 0.963504 0.630631 0.0 0.0 0.0 0.080000 0.0 0.0 0.0
support 66.000000 40.000000 21.0 13.0 9.0 7.000000 6.0 8.0 4.0
9 ... 233 235 245 247 340 353 367 accuracy \
precision 1.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.300292
recall 0.100000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.300292
f1-score 0.181818 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.300292
support 10.000000 ... 1.0 1.0 1.0 1.0 1.0 0.0 1.0 0.300292
macro avg weighted avg
precision 0.022945 0.266645
recall 0.019610 0.300292
f1-score 0.017185 0.265874
support 343.000000 343.000000
[4 rows x 111 columns]
confusion matrix:
[[66 0 0 ... 0 0 0]
[ 5 35 0 ... 0 0 0]
[ 0 18 0 ... 0 0 0]
...
[ 0 0 0 ... 0 0 0]
[ 0 0 0 ... 0 0 0]
[ 0 0 0 ... 0 0 0]]